L'objectif de ce travail est de créer un modèle d'apprentissage automatique (avec architecture Cnn) ou les réseaux de neurones convolutifs (CNN) et de l'apprentissage par transfert pour détecter une pneumonie à partir d'images radiologiques des poumons d'un patient. En traduisant le problème dans le langage de l'apprentissage automatique, nous sommes confrontés à une tâche de classification binaire. Le prétraitement des données d'entrée, ainsi que la construction d'un modèle d'apprentissage automatique seront effectués à l'aide de TensorFlow, Keras.
La pneumonie est une infection qui enflamme les sacs aériens d'un ou des deux poumons. Les sacs aériens peuvent se remplir de liquide ou de pus (matière purulente), ce qui provoque une toux avec du flegme ou du pus, de la fièvre, des frissons et des difficultés respiratoires. Divers organismes, dont des bactéries, des virus et des champignons, peuvent provoquer une pneumonie.
La gravité de la pneumonie peut aller de légère à mortelle. Elle est plus grave chez les nourrissons et les jeunes enfants, les personnes âgées de plus de 65 ans et les personnes ayant des problèmes de santé ou un système immunitaire affaibli.
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import cv2, os, random
import plotly
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode, plot, iplot
from tensorflow.keras.utils import image_dataset_from_directory
import tensorflow as tf
import glob
from tensorflow.keras import callbacks
import tensorflow
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from mlxtend.plotting import plot_confusion_matrix
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications.vgg16 import VGG16
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay
from tqdm.notebook import tqdm
from termcolor import colored
from sklearn import metrics
import albumentations as A
Description de l'ensemble de données sur la pneumonie
from warnings import filterwarnings
filterwarnings("ignore")
from sklearn import set_config
set_config(print_changed_only = False)
directory = "../input/chest-xray-pneumonia/chest_xray/"
train_path ="../input/chest-xray-pneumonia/chest_xray/train"
valid_path = "../input/chest-xray-pneumonia/chest_xray/val"
test_path = "../input/chest-xray-pneumonia/chest_xray/test"
train_df = glob.glob("../input/chest-xray-pneumonia/chest_xray/train/**/*.jpeg")
test_df = glob.glob("../input/chest-xray-pneumonia/chest_xray/test/**/*.jpeg")
validation_df = glob.glob("../input/chest-xray-pneumonia/chest_xray/val/**/*.jpeg")
print("Il y a {} images dans l'ensemble de données d'entraînement.".format(len(train_df)))
print("Il y a {} images dans le jeu de données de test.".format(len(test_df)))
print("Il y a {} images dans l'ensemble de données de validation.".format(len(validation_df)))
Il y a 5216 images dans l'ensemble de données d'entraînement. Il y a 624 images dans le jeu de données de test. Il y a 16 images dans l'ensemble de données de validation.
datasets, pneumonia_lung, normal_lung = ["train", "test", "val"], [], []
for i in datasets:
path = os.path.join(directory, i)
normal = glob.glob(os.path.join(path, "NORMAL/*.jpeg"))
pneumonia = glob.glob(os.path.join(path, "PNEUMONIA/*.jpeg"))
normal_lung.extend(normal), pneumonia_lung.extend(pneumonia)
print("Le nombre d'images de pneumonie est {}".format(len(pneumonia_lung)))
print("Le nombre d'images de non-pneumonie est {}".format(len(normal_lung)))
Le nombre d'images de pneumonie est 4273 Le nombre d'images de non-pneumonie est 1583
random.shuffle(normal_lung)
random.shuffle(pneumonia_lung)
images = normal_lung[:50] + pneumonia_lung[:50]
print(images[:10])
len(images)
['../input/chest-xray-pneumonia/chest_xray/test/NORMAL/NORMAL2-IM-0364-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/IM-0240-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/IM-0437-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/test/NORMAL/IM-0101-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/NORMAL2-IM-1028-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/test/NORMAL/IM-0041-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/NORMAL2-IM-1260-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/NORMAL2-IM-0627-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/IM-0523-0001-0001.jpeg', '../input/chest-xray-pneumonia/chest_xray/train/NORMAL/NORMAL2-IM-1341-0001.jpeg']
100
Générer un DataFrame Pandas pour les chemins et l'étiquette
def générer_labels(image_paths):
labels = []
for _ in image_paths:
if ('PNEUMONIA' in _.replace('chest-xray-pneumonia', '')):
labels.append('PNEUMONIA')
else:
labels.append('NORMAL')
return labels
def construire_df(image_paths, labels):
df = pd.DataFrame({
'image_path': image_paths,
'label': générer_labels(labels)
})
df['label_encoded'] = df.apply(lambda row: 0 if row.label == 'NORMAL' else 1, axis=1)
return df.sample(frac=1, random_state=42).reset_index()
# Build the DataFrames
train_df = construire_df(train_df, générer_labels(train_df))
val_df = construire_df(validation_df, générer_labels(validation_df))
test_df = construire_df(test_df, générer_labels(test_df))
fig = plt.figure(figsize = (20, 15))
columns, rows = 3, 3
print("NORMAL")
for i in range(1, 10):
img = cv2.imread(normal_lung[i])
img = cv2.resize(img, (512, 512))
fig.add_subplot(rows, columns, i)
plt.imshow(img)
NORMAL
fig = plt.figure(figsize = (20, 15))
columns, rows = 3, 3
print("PNEUMONIA")
for i in range(1, 10):
img = cv2.imread(pneumonia_lung[i])
img = cv2.resize(img, (512, 512))
fig.add_subplot(rows, columns, i)
plt.imshow(img)
PNEUMONIA
GaussianBlur
fig = plt.figure(figsize = (20, 15))
columns, rows = 3, 3
for i in range(1, 10):
img = cv2.imread(images[i])
img = cv2.resize(img, (512, 512))
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img = cv2.addWeighted (img, 4, cv2.GaussianBlur(img, (0, 0), 512/10), -4, 128)
fig.add_subplot(rows, columns, i)
plt.imshow(img)
plt.axis(False)
Canny edge detection:
fig = plt.figure(figsize = (20, 15))
columns, rows = 3, 3
for i in range(1, 10):
img = cv2.imread(images[i])
img = cv2.resize(img, (512, 512))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
detected_edges = cv2.Canny(img, 80, 100)
fig.add_subplot(rows, columns, i)
plt.imshow(detected_edges)
Nombre d'objets de différentes classes :
def Count_label(label: str) -> int:
"""A function that should determine the number of objects of this
class in the specified directories"""
cnt_object = 0
paths = [train_path, valid_path, test_path]
for path in paths:
path += '/' + label
cnt_object += len(os.listdir(path))
return cnt_object
COUNT_NORMAL = Count_label('NORMAL')
COUNT_PNEUMONIA = Count_label('PNEUMONIA')
fig = go.Figure()
fig.add_trace(go.Bar(
x=['NORMAL', 'PNEUMONIA'],
y=[COUNT_NORMAL, COUNT_PNEUMONIA],
name='Primary Product',
marker_color='pink',
width=[0.4, 0.4]))
fig.update_layout(title='Classes and their number in the dataset', title_x=0.5)
BATCH_SIZE = 32
EPOCHS = 20
IMAGE_SIZE = (224, 224)
train_dataset = image_dataset_from_directory(train_path,
seed=42,
batch_size=BATCH_SIZE,
image_size=IMAGE_SIZE)
valid_dataset = image_dataset_from_directory(valid_path,
batch_size=BATCH_SIZE,
image_size=IMAGE_SIZE)
test_dataset = image_dataset_from_directory(test_path,
batch_size=BATCH_SIZE,
image_size=IMAGE_SIZE)
rescale = Rescaling(scale=1.0 / 255)
train_d_without_aug = train_dataset.map(lambda image, label: (rescale(image), label))
valid_d_without_aug = valid_dataset.map(lambda image, label: (rescale(image), label))
test_d_without_aug = test_dataset.map(lambda image, label: (rescale(image), label))
Found 5216 files belonging to 2 classes. Found 16 files belonging to 2 classes. Found 624 files belonging to 2 classes.
train_list = [x for x in train_normal]
train_list.extend([x for x in train_pneumonia])
df_train = pd.DataFrame(np.concatenate([['Normal']*len(train_normal) , ['Pneumonia']*len(train_pneumonia)]), columns = ['class'])
df_train['image'] = [x for x in train_list]
test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia])
df_test = pd.DataFrame(np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia)]), columns = ['class'])
df_test['image'] = [x for x in test_list]
Afin d'éviter le problème de l'overfitting, nous devons élargir artificiellement notre jeu de données. Nous pouvons rendre notre jeu de données existant encore plus grand. L'idée est de modifier les données d'apprentissage par de petites transformations afin de reproduire les variations. Les approches qui modifient les données d'apprentissage de manière à changer la représentation du tableau tout en conservant l'étiquette sont connues sous le nom de techniques d'augmentation des données. Les augmentations les plus courantes sont les niveaux de gris, les inversions horizontales, les inversions verticales, les coupes aléatoires, les variations de couleur, les translations, les rotations et bien d'autres encore. En appliquant quelques-unes de ces transformations à nos données de formation, nous pouvons facilement doubler ou tripler le nombre d'exemples de formation et créer un modèle très robuste.
Pour l'augmentation des données, j'ai choisi de :
print("Il y a {} images dans l'ensemble de données d'entrainement avant l'augmentation des données.".format(len(train_df)))
print("Il y a {} images dans l'ensemble de données de test avant l'augmentation des données.".format(len(test_df)))
print("Il y a {} images dans l'ensemble de données de validation avant l'augmentation des données.".format(len(validation_df)))
Il y a 5216 images dans l'ensemble de données d'entrainement avant l'augmentation des données. Il y a 624 images dans l'ensemble de données de test avant l'augmentation des données. Il y a 16 images dans l'ensemble de données de validation avant l'augmentation des données.
Maintenant, nous allons charger les images depuis les dossiers et les préparer à alimenter nos modèles.
Nous commençons par définir les générateurs de données. Avec le générateur de données d'image Keras, nous pouvons remettre à l'échelle les valeurs des pixels et appliquer des techniques de transformation aléatoire pour augmenter les données à la volée. Nous définissons deux générateurs différents. Le générateur val_datagen est utilisé pour simplement redimensionner les ensembles de validation et de test. Le générateur train_datagen inclut certaines transformations pour augmenter l'ensemble de données de formation.
Nous appliquons les générateurs sur chaque ensemble de données en utilisant la méthode flow_from_dataframe. Outre les transformations définies dans chaque générateur, les images sont également redimensionnées en fonction de l'ensemble target_size.
class CFG:
EPOCHS = 10
BATCH_SIZE = 32
SEED = 42
TF_SEED = 768
HEIGHT = 224
WIDTH = 224
CHANNELS = 3
IMAGE_SIZE = (224, 224, 3)
IMG_SIZE = 224
BATCH = 32
SEED = 42
train_path = os.path.join(directory,"train")
test_path=os.path.join(directory,"test")
train_normal = glob.glob(train_path+"/NORMAL/*.jpeg")
train_pneumonia = glob.glob(train_path+"/PNEUMONIA/*.jpeg")
test_normal = glob.glob(test_path+"/NORMAL/*.jpeg")
test_pneumonia = glob.glob(test_path+"/PNEUMONIA/*.jpeg")
val_normal = glob.glob(val_path+"/NORMAL/*.jpeg")
val_pneumonia = glob.glob(val_path+"/PNEUMONIA/*.jpeg")
len(train_normal)
1341
train_normal=train_normal+val_normal
train_pneumonia=train_pneumonia+val_pneumonia
len(train_normal)
1349
df_test
| class | image | |
|---|---|---|
| 0 | Normal | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 1 | Normal | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 2 | Normal | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 3 | Normal | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 4 | Normal | ../input/chest-xray-pneumonia/chest_xray/test/... |
| ... | ... | ... |
| 619 | Pneumonia | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 620 | Pneumonia | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 621 | Pneumonia | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 622 | Pneumonia | ../input/chest-xray-pneumonia/chest_xray/test/... |
| 623 | Pneumonia | ../input/chest-xray-pneumonia/chest_xray/test/... |
624 rows × 2 columns
train_list = [x for x in train_normal]
train_list.extend([x for x in train_pneumonia])
df_train = pd.DataFrame(np.concatenate([['Normal']*len(train_normal) , ['Pneumonia']*len(train_pneumonia)]), columns = ['class'])
df_train['image'] = [x for x in train_list]
test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia])
df_test = pd.DataFrame(np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia)]), columns = ['class'])
df_test['image'] = [x for x in test_list]
train_df, val_df = train_test_split(df_train, test_size = 0.20, random_state = SEED, stratify = df_train['class'])
train_datagen = ImageDataGenerator(rescale=1/255.,
zoom_range = 0.1,
rotation_range = 0.1,
width_shift_range = 0.1,
height_shift_range = 0.1)
val_datagen = ImageDataGenerator(rescale=1/255.)
ds_train = train_datagen.flow_from_dataframe(train_df,
#directory=train_path, #dataframe contains the full paths
x_col = 'image',
y_col = 'class',
target_size = (IMG_SIZE, IMG_SIZE),
class_mode = 'binary',
batch_size = BATCH,
seed = SEED)
ds_val = val_datagen.flow_from_dataframe(val_df,
#directory=train_path,
x_col = 'image',
y_col = 'class',
target_size = (IMG_SIZE, IMG_SIZE),
class_mode = 'binary',
batch_size = BATCH,
seed = SEED)
ds_test = val_datagen.flow_from_dataframe(df_test,
#directory=test_path,
x_col = 'image',
y_col = 'class',
target_size = (IMG_SIZE, IMG_SIZE),
class_mode = 'binary',
batch_size = 1,
shuffle = False)
Found 4185 validated image filenames belonging to 2 classes. Found 1047 validated image filenames belonging to 2 classes. Found 624 validated image filenames belonging to 2 classes.
early_stopping = callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
min_delta=1e-7,
restore_best_weights=True,
)
plateau = callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor = 0.2,
patience = 2,
min_delt = 1e-7,
cooldown = 0,
verbose = 1
)
def cnn_model():
initializer = tf.keras.initializers.GlorotNormal()
cnn_sequential = Sequential([
layers.Input(shape=(224, 224, 3), dtype=tf.float32, name='input_image'),
layers.Conv2D(8, kernel_size=3, activation='relu', kernel_initializer=initializer),
layers.Conv2D(8, kernel_size=3, activation='relu', kernel_initializer=initializer),
layers.MaxPool2D(pool_size=2, padding='valid'),
layers.Conv2D(8, kernel_size=3, activation='relu', kernel_initializer=initializer),
layers.Conv2D(8, kernel_size=3, activation='relu', kernel_initializer=initializer),
layers.MaxPool2D(pool_size=2),
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(2, activation='sigmoid', kernel_initializer=initializer)
], name='cnn_sequential_model')
return cnn_sequential
keras.backend.clear_session()
model_cnn = cnn_model()
model_cnn.summary()
Model: "cnn_sequential_model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 222, 222, 8) 224 _________________________________________________________________ conv2d_1 (Conv2D) (None, 220, 220, 8) 584 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 110, 110, 8) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 108, 108, 8) 584 _________________________________________________________________ conv2d_3 (Conv2D) (None, 106, 106, 8) 584 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 53, 53, 8) 0 _________________________________________________________________ flatten (Flatten) (None, 22472) 0 _________________________________________________________________ dropout (Dropout) (None, 22472) 0 _________________________________________________________________ dense (Dense) (None, 2) 44946 ================================================================= Total params: 46,922 Trainable params: 46,922 Non-trainable params: 0 _________________________________________________________________
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=3,
restore_best_weights=True)
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
patience=2,
factor=0.1,
verbose=1)
EPOCHS = CFG.EPOCHS
CALLBACKS = [early_stopping_callback, reduce_lr_callback]
METRICS = ['accuracy']
early_stopping = callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
min_delta=1e-7,
restore_best_weights=True,
)
plateau = callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor = 0.2,
patience = 2,
min_delt = 1e-7,
cooldown = 0,
verbose = 1
)
def get_model():
#Input shape = [width, height, color channels]
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
# Block One
x = layers.Conv2D(filters=16, kernel_size=3, padding='valid')(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.2)(x)
# Block Two
x = layers.Conv2D(filters=32, kernel_size=3, padding='valid')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.2)(x)
# Block Three
x = layers.Conv2D(filters=64, kernel_size=3, padding='valid')(x)
x = layers.Conv2D(filters=64, kernel_size=3, padding='valid')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.4)(x)
# Head
#x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.5)(x)
#Final Layer (Output)
output = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=[inputs], outputs=output)
return model
keras.backend.clear_session()
model = get_model()
model.compile(loss='binary_crossentropy'
, optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5), metrics='binary_accuracy')
model.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 224, 224, 3)] 0 _________________________________________________________________ conv2d (Conv2D) (None, 222, 222, 16) 448 _________________________________________________________________ batch_normalization (BatchNo (None, 222, 222, 16) 64 _________________________________________________________________ activation (Activation) (None, 222, 222, 16) 0 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 111, 111, 16) 0 _________________________________________________________________ dropout (Dropout) (None, 111, 111, 16) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 109, 109, 32) 4640 _________________________________________________________________ batch_normalization_1 (Batch (None, 109, 109, 32) 128 _________________________________________________________________ activation_1 (Activation) (None, 109, 109, 32) 0 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 54, 54, 32) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 54, 54, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 52, 52, 64) 18496 _________________________________________________________________ conv2d_3 (Conv2D) (None, 50, 50, 64) 36928 _________________________________________________________________ batch_normalization_2 (Batch (None, 50, 50, 64) 256 _________________________________________________________________ activation_2 (Activation) (None, 50, 50, 64) 0 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 25, 25, 64) 0 _________________________________________________________________ dropout_2 (Dropout) (None, 25, 25, 64) 0 _________________________________________________________________ flatten (Flatten) (None, 40000) 0 _________________________________________________________________ dense (Dense) (None, 64) 2560064 _________________________________________________________________ dropout_3 (Dropout) (None, 64) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 65 ================================================================= Total params: 2,621,089 Trainable params: 2,620,865 Non-trainable params: 224 _________________________________________________________________
history = model.fit(ds_train,
batch_size = BATCH, epochs = 50,
validation_data=ds_val,
callbacks=[early_stopping, plateau],
steps_per_epoch=(len(train_df)/BATCH),
validation_steps=(len(val_df)/BATCH));
Epoch 1/50 130/130 [==============================] - 104s 794ms/step - loss: 0.5651 - binary_accuracy: 0.7403 - val_loss: 0.7057 - val_binary_accuracy: 0.7421 Epoch 2/50 130/130 [==============================] - 102s 781ms/step - loss: 0.3548 - binary_accuracy: 0.8385 - val_loss: 1.2114 - val_binary_accuracy: 0.7421 Epoch 3/50 130/130 [==============================] - 103s 787ms/step - loss: 0.2705 - binary_accuracy: 0.8786 - val_loss: 1.4785 - val_binary_accuracy: 0.7421 Epoch 00003: ReduceLROnPlateau reducing learning rate to 5.9999998484272515e-06. Epoch 4/50 130/130 [==============================] - 103s 789ms/step - loss: 0.2333 - binary_accuracy: 0.9016 - val_loss: 1.0161 - val_binary_accuracy: 0.7421 Epoch 5/50 130/130 [==============================] - 103s 790ms/step - loss: 0.2170 - binary_accuracy: 0.9109 - val_loss: 0.3756 - val_binary_accuracy: 0.8233 Epoch 6/50 130/130 [==============================] - 102s 782ms/step - loss: 0.2214 - binary_accuracy: 0.9125 - val_loss: 0.1668 - val_binary_accuracy: 0.9322 Epoch 7/50 130/130 [==============================] - 104s 796ms/step - loss: 0.2123 - binary_accuracy: 0.9185 - val_loss: 0.1392 - val_binary_accuracy: 0.9532 Epoch 8/50 130/130 [==============================] - 102s 784ms/step - loss: 0.2050 - binary_accuracy: 0.9180 - val_loss: 0.1520 - val_binary_accuracy: 0.9427 Epoch 9/50 130/130 [==============================] - 102s 780ms/step - loss: 0.2023 - binary_accuracy: 0.9176 - val_loss: 0.1334 - val_binary_accuracy: 0.9551 Epoch 10/50 130/130 [==============================] - 102s 780ms/step - loss: 0.1877 - binary_accuracy: 0.9266 - val_loss: 0.1344 - val_binary_accuracy: 0.9522 Epoch 11/50 130/130 [==============================] - 102s 778ms/step - loss: 0.1926 - binary_accuracy: 0.9262 - val_loss: 0.1466 - val_binary_accuracy: 0.9417 Epoch 00011: ReduceLROnPlateau reducing learning rate to 1.1999999514955563e-06. Epoch 12/50 130/130 [==============================] - 102s 783ms/step - loss: 0.1972 - binary_accuracy: 0.9240 - val_loss: 0.1378 - val_binary_accuracy: 0.9465 Epoch 13/50 130/130 [==============================] - 101s 776ms/step - loss: 0.1862 - binary_accuracy: 0.9271 - val_loss: 0.1383 - val_binary_accuracy: 0.9465 Epoch 00013: ReduceLROnPlateau reducing learning rate to 2.3999998575163774e-07. Epoch 14/50 130/130 [==============================] - 101s 774ms/step - loss: 0.1867 - binary_accuracy: 0.9286 - val_loss: 0.1367 - val_binary_accuracy: 0.9475
import seaborn as sns
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['loss'])
sns.lineplot(x = history.epoch, y = history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.set_ylim(0, 0.5)
ax.legend(['train', 'val'], loc='best')
plt.show()
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['binary_accuracy'])
sns.lineplot(x = history.epoch, y = history.history['val_binary_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.set_ylim(0.80, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()
score = model.evaluate(ds_val, steps = len(val_df)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])
Val loss: 0.13340571522712708 Val accuracy: 0.9551098346710205
Prédiction sur l'ensemble de test :
score1 = model.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score1[0])
print('Test accuracy:', score1[1])
Test loss: 0.516758382320404 Test accuracy: 0.7884615659713745
res_test_predictions = tf.argmax(model, axis=1)
model.save("/kaggle/working/model")
predictions_cnn = model.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions_cnn>0.5, 1, 0)
print("Test Accuracy CNN: ", accuracy_score(Y_test, pred_labels))
Test Accuracy CNN: 0.7884615384615384
print(metrics.classification_report(Y_test, pred_labels, labels = [0, 1]))
precision recall f1-score support
0 0.96 0.45 0.62 234
1 0.75 0.99 0.85 390
accuracy 0.79 624
macro avg 0.86 0.72 0.74 624
weighted avg 0.83 0.79 0.76 624
confusion_matrix = metrics.confusion_matrix(Y_test, pred_labels)
sns.heatmap(confusion_matrix, annot=True, fmt="d")
plt.xlabel("Predicted Label", fontsize= 12)
plt.ylabel("True Label", fontsize= 12)
plt.show()
Transfer Learning: ResNet50 V2
Un réseau neuronal résiduel (ResNet) empile des blocs résiduels les uns sur les autres pour former un réseau. ResNet, abréviation de Residual Network, est un type spécifique de réseau neuronal qui a été introduit en 2015 par Kaiming He, Xiangyu Zhang, Shaoqing Ren et Jian Sun dans leur article "Deep Residual Learning for Image Recognition". Les réseaux résiduels profonds, comme le populaire modèle ResNet-50, sont des réseaux neuronaux convolutifs (CNN) d'une profondeur de 50 couches.
base_model = tf.keras.applications.ResNet152V2(
weights='imagenet',
input_shape=(IMG_SIZE, IMG_SIZE, 3),
include_top=False)
base_model.trainable = False
def get_pretrained():
#Input shape = [width, height, color channels]
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = base_model(inputs)
# Head
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.1)(x)
#Final Layer (Output)
output = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=[inputs], outputs=output)
return model
keras.backend.clear_session()
model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy'
, optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5), metrics='binary_accuracy')
model_pretrained.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 224, 224, 3)] 0 _________________________________________________________________ resnet152v2 (Functional) (None, 7, 7, 2048) 58331648 _________________________________________________________________ global_average_pooling2d (Gl (None, 2048) 0 _________________________________________________________________ dense (Dense) (None, 128) 262272 _________________________________________________________________ dropout (Dropout) (None, 128) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 129 ================================================================= Total params: 58,594,049 Trainable params: 262,401 Non-trainable params: 58,331,648 _________________________________________________________________
history1 = model_pretrained.fit(ds_train,
batch_size = BATCH, epochs = 50,
validation_data=ds_val,
callbacks=[early_stopping, plateau],
steps_per_epoch=(len(train_df)/BATCH),
validation_steps=(len(val_df)/BATCH));
Epoch 1/50 130/130 [==============================] - 124s 886ms/step - loss: 0.3715 - binary_accuracy: 0.8478 - val_loss: 0.2132 - val_binary_accuracy: 0.9255 Epoch 2/50 130/130 [==============================] - 111s 851ms/step - loss: 0.2007 - binary_accuracy: 0.9257 - val_loss: 0.1615 - val_binary_accuracy: 0.9370 Epoch 3/50 130/130 [==============================] - 113s 862ms/step - loss: 0.1608 - binary_accuracy: 0.9403 - val_loss: 0.1352 - val_binary_accuracy: 0.9465 Epoch 4/50 130/130 [==============================] - 113s 861ms/step - loss: 0.1496 - binary_accuracy: 0.9467 - val_loss: 0.1268 - val_binary_accuracy: 0.9561 Epoch 5/50 130/130 [==============================] - 113s 863ms/step - loss: 0.1325 - binary_accuracy: 0.9498 - val_loss: 0.1123 - val_binary_accuracy: 0.9551 Epoch 6/50 130/130 [==============================] - 112s 859ms/step - loss: 0.1280 - binary_accuracy: 0.9553 - val_loss: 0.1062 - val_binary_accuracy: 0.9570 Epoch 7/50 130/130 [==============================] - 112s 852ms/step - loss: 0.1255 - binary_accuracy: 0.9536 - val_loss: 0.1025 - val_binary_accuracy: 0.9580 Epoch 8/50 130/130 [==============================] - 113s 862ms/step - loss: 0.1144 - binary_accuracy: 0.9570 - val_loss: 0.1006 - val_binary_accuracy: 0.9647 Epoch 9/50 130/130 [==============================] - 113s 864ms/step - loss: 0.1100 - binary_accuracy: 0.9620 - val_loss: 0.0955 - val_binary_accuracy: 0.9656 Epoch 10/50 130/130 [==============================] - 111s 850ms/step - loss: 0.1105 - binary_accuracy: 0.9596 - val_loss: 0.0921 - val_binary_accuracy: 0.9647 Epoch 11/50 130/130 [==============================] - 112s 859ms/step - loss: 0.1080 - binary_accuracy: 0.9591 - val_loss: 0.0937 - val_binary_accuracy: 0.9675 Epoch 12/50 130/130 [==============================] - 111s 848ms/step - loss: 0.1076 - binary_accuracy: 0.9589 - val_loss: 0.0912 - val_binary_accuracy: 0.9666 Epoch 13/50 130/130 [==============================] - 112s 857ms/step - loss: 0.1006 - binary_accuracy: 0.9634 - val_loss: 0.0869 - val_binary_accuracy: 0.9685 Epoch 14/50 130/130 [==============================] - 111s 848ms/step - loss: 0.1033 - binary_accuracy: 0.9642 - val_loss: 0.0918 - val_binary_accuracy: 0.9675 Epoch 15/50 130/130 [==============================] - 113s 860ms/step - loss: 0.1020 - binary_accuracy: 0.9642 - val_loss: 0.0834 - val_binary_accuracy: 0.9723 Epoch 16/50 130/130 [==============================] - 111s 849ms/step - loss: 0.1020 - binary_accuracy: 0.9622 - val_loss: 0.0905 - val_binary_accuracy: 0.9656 Epoch 17/50 130/130 [==============================] - 112s 856ms/step - loss: 0.0982 - binary_accuracy: 0.9615 - val_loss: 0.0930 - val_binary_accuracy: 0.9637 Epoch 00017: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06. Epoch 18/50 130/130 [==============================] - 112s 854ms/step - loss: 0.0939 - binary_accuracy: 0.9689 - val_loss: 0.0897 - val_binary_accuracy: 0.9675 Epoch 19/50 130/130 [==============================] - 111s 850ms/step - loss: 0.0860 - binary_accuracy: 0.9711 - val_loss: 0.0887 - val_binary_accuracy: 0.9675 Epoch 00019: ReduceLROnPlateau reducing learning rate to 1.9999999494757505e-06. Epoch 20/50 130/130 [==============================] - 111s 851ms/step - loss: 0.0911 - binary_accuracy: 0.9668 - val_loss: 0.0890 - val_binary_accuracy: 0.9675
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history1.epoch, y = history1.history['loss'])
sns.lineplot(x = history1.epoch, y = history1.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.set_ylim(0, 0.5)
ax.legend(['train', 'val'], loc='best')
plt.show()
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history1.epoch, y = history1.history['binary_accuracy'])
sns.lineplot(x = history1.epoch, y = history1.history['val_binary_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.set_ylim(0.80, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()
score = model_pretrained.evaluate(ds_val, steps = len(val_df)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])
Val loss: 0.12076888233423233 Val accuracy: 0.9560649394989014
score1 = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score1[0])
print('Test accuracy:', score1[1])
Test loss: 0.329448938369751 Test accuracy: 0.8733974099159241
model_pretrained.save("/kaggle/working/modelresent")
2023-01-15 19:50:24.622263: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
base_model.trainable = True
# Freeze all layers except for the
for layer in base_model.layers[:-13]:
layer.trainable = False
for layer_number, layer in enumerate(base_model.layers):
print(layer_number, layer.name, layer.trainable)
0 input_3 False 1 conv1_pad False 2 conv1_conv False 3 pool1_pad False 4 pool1_pool False 5 conv2_block1_preact_bn False 6 conv2_block1_preact_relu False 7 conv2_block1_1_conv False 8 conv2_block1_1_bn False 9 conv2_block1_1_relu False 10 conv2_block1_2_pad False 11 conv2_block1_2_conv False 12 conv2_block1_2_bn False 13 conv2_block1_2_relu False 14 conv2_block1_0_conv False 15 conv2_block1_3_conv False 16 conv2_block1_out False 17 conv2_block2_preact_bn False 18 conv2_block2_preact_relu False 19 conv2_block2_1_conv False 20 conv2_block2_1_bn False 21 conv2_block2_1_relu False 22 conv2_block2_2_pad False 23 conv2_block2_2_conv False 24 conv2_block2_2_bn False 25 conv2_block2_2_relu False 26 conv2_block2_3_conv False 27 conv2_block2_out False 28 conv2_block3_preact_bn False 29 conv2_block3_preact_relu False 30 conv2_block3_1_conv False 31 conv2_block3_1_bn False 32 conv2_block3_1_relu False 33 conv2_block3_2_pad False 34 conv2_block3_2_conv False 35 conv2_block3_2_bn False 36 conv2_block3_2_relu False 37 max_pooling2d_3 False 38 conv2_block3_3_conv False 39 conv2_block3_out False 40 conv3_block1_preact_bn False 41 conv3_block1_preact_relu False 42 conv3_block1_1_conv False 43 conv3_block1_1_bn False 44 conv3_block1_1_relu False 45 conv3_block1_2_pad False 46 conv3_block1_2_conv False 47 conv3_block1_2_bn False 48 conv3_block1_2_relu False 49 conv3_block1_0_conv False 50 conv3_block1_3_conv False 51 conv3_block1_out False 52 conv3_block2_preact_bn False 53 conv3_block2_preact_relu False 54 conv3_block2_1_conv False 55 conv3_block2_1_bn False 56 conv3_block2_1_relu False 57 conv3_block2_2_pad False 58 conv3_block2_2_conv False 59 conv3_block2_2_bn False 60 conv3_block2_2_relu False 61 conv3_block2_3_conv False 62 conv3_block2_out False 63 conv3_block3_preact_bn False 64 conv3_block3_preact_relu False 65 conv3_block3_1_conv False 66 conv3_block3_1_bn False 67 conv3_block3_1_relu False 68 conv3_block3_2_pad False 69 conv3_block3_2_conv False 70 conv3_block3_2_bn False 71 conv3_block3_2_relu False 72 conv3_block3_3_conv False 73 conv3_block3_out False 74 conv3_block4_preact_bn False 75 conv3_block4_preact_relu False 76 conv3_block4_1_conv False 77 conv3_block4_1_bn False 78 conv3_block4_1_relu False 79 conv3_block4_2_pad False 80 conv3_block4_2_conv False 81 conv3_block4_2_bn False 82 conv3_block4_2_relu False 83 conv3_block4_3_conv False 84 conv3_block4_out False 85 conv3_block5_preact_bn False 86 conv3_block5_preact_relu False 87 conv3_block5_1_conv False 88 conv3_block5_1_bn False 89 conv3_block5_1_relu False 90 conv3_block5_2_pad False 91 conv3_block5_2_conv False 92 conv3_block5_2_bn False 93 conv3_block5_2_relu False 94 conv3_block5_3_conv False 95 conv3_block5_out False 96 conv3_block6_preact_bn False 97 conv3_block6_preact_relu False 98 conv3_block6_1_conv False 99 conv3_block6_1_bn False 100 conv3_block6_1_relu False 101 conv3_block6_2_pad False 102 conv3_block6_2_conv False 103 conv3_block6_2_bn False 104 conv3_block6_2_relu False 105 conv3_block6_3_conv False 106 conv3_block6_out False 107 conv3_block7_preact_bn False 108 conv3_block7_preact_relu False 109 conv3_block7_1_conv False 110 conv3_block7_1_bn False 111 conv3_block7_1_relu False 112 conv3_block7_2_pad False 113 conv3_block7_2_conv False 114 conv3_block7_2_bn False 115 conv3_block7_2_relu False 116 conv3_block7_3_conv False 117 conv3_block7_out False 118 conv3_block8_preact_bn False 119 conv3_block8_preact_relu False 120 conv3_block8_1_conv False 121 conv3_block8_1_bn False 122 conv3_block8_1_relu False 123 conv3_block8_2_pad False 124 conv3_block8_2_conv False 125 conv3_block8_2_bn False 126 conv3_block8_2_relu False 127 max_pooling2d_4 False 128 conv3_block8_3_conv False 129 conv3_block8_out False 130 conv4_block1_preact_bn False 131 conv4_block1_preact_relu False 132 conv4_block1_1_conv False 133 conv4_block1_1_bn False 134 conv4_block1_1_relu False 135 conv4_block1_2_pad False 136 conv4_block1_2_conv False 137 conv4_block1_2_bn False 138 conv4_block1_2_relu False 139 conv4_block1_0_conv False 140 conv4_block1_3_conv False 141 conv4_block1_out False 142 conv4_block2_preact_bn False 143 conv4_block2_preact_relu False 144 conv4_block2_1_conv False 145 conv4_block2_1_bn False 146 conv4_block2_1_relu False 147 conv4_block2_2_pad False 148 conv4_block2_2_conv False 149 conv4_block2_2_bn False 150 conv4_block2_2_relu False 151 conv4_block2_3_conv False 152 conv4_block2_out False 153 conv4_block3_preact_bn False 154 conv4_block3_preact_relu False 155 conv4_block3_1_conv False 156 conv4_block3_1_bn False 157 conv4_block3_1_relu False 158 conv4_block3_2_pad False 159 conv4_block3_2_conv False 160 conv4_block3_2_bn False 161 conv4_block3_2_relu False 162 conv4_block3_3_conv False 163 conv4_block3_out False 164 conv4_block4_preact_bn False 165 conv4_block4_preact_relu False 166 conv4_block4_1_conv False 167 conv4_block4_1_bn False 168 conv4_block4_1_relu False 169 conv4_block4_2_pad False 170 conv4_block4_2_conv False 171 conv4_block4_2_bn False 172 conv4_block4_2_relu False 173 conv4_block4_3_conv False 174 conv4_block4_out False 175 conv4_block5_preact_bn False 176 conv4_block5_preact_relu False 177 conv4_block5_1_conv False 178 conv4_block5_1_bn False 179 conv4_block5_1_relu False 180 conv4_block5_2_pad False 181 conv4_block5_2_conv False 182 conv4_block5_2_bn False 183 conv4_block5_2_relu False 184 conv4_block5_3_conv False 185 conv4_block5_out False 186 conv4_block6_preact_bn False 187 conv4_block6_preact_relu False 188 conv4_block6_1_conv False 189 conv4_block6_1_bn False 190 conv4_block6_1_relu False 191 conv4_block6_2_pad False 192 conv4_block6_2_conv False 193 conv4_block6_2_bn False 194 conv4_block6_2_relu False 195 conv4_block6_3_conv False 196 conv4_block6_out False 197 conv4_block7_preact_bn False 198 conv4_block7_preact_relu False 199 conv4_block7_1_conv False 200 conv4_block7_1_bn False 201 conv4_block7_1_relu False 202 conv4_block7_2_pad False 203 conv4_block7_2_conv False 204 conv4_block7_2_bn False 205 conv4_block7_2_relu False 206 conv4_block7_3_conv False 207 conv4_block7_out False 208 conv4_block8_preact_bn False 209 conv4_block8_preact_relu False 210 conv4_block8_1_conv False 211 conv4_block8_1_bn False 212 conv4_block8_1_relu False 213 conv4_block8_2_pad False 214 conv4_block8_2_conv False 215 conv4_block8_2_bn False 216 conv4_block8_2_relu False 217 conv4_block8_3_conv False 218 conv4_block8_out False 219 conv4_block9_preact_bn False 220 conv4_block9_preact_relu False 221 conv4_block9_1_conv False 222 conv4_block9_1_bn False 223 conv4_block9_1_relu False 224 conv4_block9_2_pad False 225 conv4_block9_2_conv False 226 conv4_block9_2_bn False 227 conv4_block9_2_relu False 228 conv4_block9_3_conv False 229 conv4_block9_out False 230 conv4_block10_preact_bn False 231 conv4_block10_preact_relu False 232 conv4_block10_1_conv False 233 conv4_block10_1_bn False 234 conv4_block10_1_relu False 235 conv4_block10_2_pad False 236 conv4_block10_2_conv False 237 conv4_block10_2_bn False 238 conv4_block10_2_relu False 239 conv4_block10_3_conv False 240 conv4_block10_out False 241 conv4_block11_preact_bn False 242 conv4_block11_preact_relu False 243 conv4_block11_1_conv False 244 conv4_block11_1_bn False 245 conv4_block11_1_relu False 246 conv4_block11_2_pad False 247 conv4_block11_2_conv False 248 conv4_block11_2_bn False 249 conv4_block11_2_relu False 250 conv4_block11_3_conv False 251 conv4_block11_out False 252 conv4_block12_preact_bn False 253 conv4_block12_preact_relu False 254 conv4_block12_1_conv False 255 conv4_block12_1_bn False 256 conv4_block12_1_relu False 257 conv4_block12_2_pad False 258 conv4_block12_2_conv False 259 conv4_block12_2_bn False 260 conv4_block12_2_relu False 261 conv4_block12_3_conv False 262 conv4_block12_out False 263 conv4_block13_preact_bn False 264 conv4_block13_preact_relu False 265 conv4_block13_1_conv False 266 conv4_block13_1_bn False 267 conv4_block13_1_relu False 268 conv4_block13_2_pad False 269 conv4_block13_2_conv False 270 conv4_block13_2_bn False 271 conv4_block13_2_relu False 272 conv4_block13_3_conv False 273 conv4_block13_out False 274 conv4_block14_preact_bn False 275 conv4_block14_preact_relu False 276 conv4_block14_1_conv False 277 conv4_block14_1_bn False 278 conv4_block14_1_relu False 279 conv4_block14_2_pad False 280 conv4_block14_2_conv False 281 conv4_block14_2_bn False 282 conv4_block14_2_relu False 283 conv4_block14_3_conv False 284 conv4_block14_out False 285 conv4_block15_preact_bn False 286 conv4_block15_preact_relu False 287 conv4_block15_1_conv False 288 conv4_block15_1_bn False 289 conv4_block15_1_relu False 290 conv4_block15_2_pad False 291 conv4_block15_2_conv False 292 conv4_block15_2_bn False 293 conv4_block15_2_relu False 294 conv4_block15_3_conv False 295 conv4_block15_out False 296 conv4_block16_preact_bn False 297 conv4_block16_preact_relu False 298 conv4_block16_1_conv False 299 conv4_block16_1_bn False 300 conv4_block16_1_relu False 301 conv4_block16_2_pad False 302 conv4_block16_2_conv False 303 conv4_block16_2_bn False 304 conv4_block16_2_relu False 305 conv4_block16_3_conv False 306 conv4_block16_out False 307 conv4_block17_preact_bn False 308 conv4_block17_preact_relu False 309 conv4_block17_1_conv False 310 conv4_block17_1_bn False 311 conv4_block17_1_relu False 312 conv4_block17_2_pad False 313 conv4_block17_2_conv False 314 conv4_block17_2_bn False 315 conv4_block17_2_relu False 316 conv4_block17_3_conv False 317 conv4_block17_out False 318 conv4_block18_preact_bn False 319 conv4_block18_preact_relu False 320 conv4_block18_1_conv False 321 conv4_block18_1_bn False 322 conv4_block18_1_relu False 323 conv4_block18_2_pad False 324 conv4_block18_2_conv False 325 conv4_block18_2_bn False 326 conv4_block18_2_relu False 327 conv4_block18_3_conv False 328 conv4_block18_out False 329 conv4_block19_preact_bn False 330 conv4_block19_preact_relu False 331 conv4_block19_1_conv False 332 conv4_block19_1_bn False 333 conv4_block19_1_relu False 334 conv4_block19_2_pad False 335 conv4_block19_2_conv False 336 conv4_block19_2_bn False 337 conv4_block19_2_relu False 338 conv4_block19_3_conv False 339 conv4_block19_out False 340 conv4_block20_preact_bn False 341 conv4_block20_preact_relu False 342 conv4_block20_1_conv False 343 conv4_block20_1_bn False 344 conv4_block20_1_relu False 345 conv4_block20_2_pad False 346 conv4_block20_2_conv False 347 conv4_block20_2_bn False 348 conv4_block20_2_relu False 349 conv4_block20_3_conv False 350 conv4_block20_out False 351 conv4_block21_preact_bn False 352 conv4_block21_preact_relu False 353 conv4_block21_1_conv False 354 conv4_block21_1_bn False 355 conv4_block21_1_relu False 356 conv4_block21_2_pad False 357 conv4_block21_2_conv False 358 conv4_block21_2_bn False 359 conv4_block21_2_relu False 360 conv4_block21_3_conv False 361 conv4_block21_out False 362 conv4_block22_preact_bn False 363 conv4_block22_preact_relu False 364 conv4_block22_1_conv False 365 conv4_block22_1_bn False 366 conv4_block22_1_relu False 367 conv4_block22_2_pad False 368 conv4_block22_2_conv False 369 conv4_block22_2_bn False 370 conv4_block22_2_relu False 371 conv4_block22_3_conv False 372 conv4_block22_out False 373 conv4_block23_preact_bn False 374 conv4_block23_preact_relu False 375 conv4_block23_1_conv False 376 conv4_block23_1_bn False 377 conv4_block23_1_relu False 378 conv4_block23_2_pad False 379 conv4_block23_2_conv False 380 conv4_block23_2_bn False 381 conv4_block23_2_relu False 382 conv4_block23_3_conv False 383 conv4_block23_out False 384 conv4_block24_preact_bn False 385 conv4_block24_preact_relu False 386 conv4_block24_1_conv False 387 conv4_block24_1_bn False 388 conv4_block24_1_relu False 389 conv4_block24_2_pad False 390 conv4_block24_2_conv False 391 conv4_block24_2_bn False 392 conv4_block24_2_relu False 393 conv4_block24_3_conv False 394 conv4_block24_out False 395 conv4_block25_preact_bn False 396 conv4_block25_preact_relu False 397 conv4_block25_1_conv False 398 conv4_block25_1_bn False 399 conv4_block25_1_relu False 400 conv4_block25_2_pad False 401 conv4_block25_2_conv False 402 conv4_block25_2_bn False 403 conv4_block25_2_relu False 404 conv4_block25_3_conv False 405 conv4_block25_out False 406 conv4_block26_preact_bn False 407 conv4_block26_preact_relu False 408 conv4_block26_1_conv False 409 conv4_block26_1_bn False 410 conv4_block26_1_relu False 411 conv4_block26_2_pad False 412 conv4_block26_2_conv False 413 conv4_block26_2_bn False 414 conv4_block26_2_relu False 415 conv4_block26_3_conv False 416 conv4_block26_out False 417 conv4_block27_preact_bn False 418 conv4_block27_preact_relu False 419 conv4_block27_1_conv False 420 conv4_block27_1_bn False 421 conv4_block27_1_relu False 422 conv4_block27_2_pad False 423 conv4_block27_2_conv False 424 conv4_block27_2_bn False 425 conv4_block27_2_relu False 426 conv4_block27_3_conv False 427 conv4_block27_out False 428 conv4_block28_preact_bn False 429 conv4_block28_preact_relu False 430 conv4_block28_1_conv False 431 conv4_block28_1_bn False 432 conv4_block28_1_relu False 433 conv4_block28_2_pad False 434 conv4_block28_2_conv False 435 conv4_block28_2_bn False 436 conv4_block28_2_relu False 437 conv4_block28_3_conv False 438 conv4_block28_out False 439 conv4_block29_preact_bn False 440 conv4_block29_preact_relu False 441 conv4_block29_1_conv False 442 conv4_block29_1_bn False 443 conv4_block29_1_relu False 444 conv4_block29_2_pad False 445 conv4_block29_2_conv False 446 conv4_block29_2_bn False 447 conv4_block29_2_relu False 448 conv4_block29_3_conv False 449 conv4_block29_out False 450 conv4_block30_preact_bn False 451 conv4_block30_preact_relu False 452 conv4_block30_1_conv False 453 conv4_block30_1_bn False 454 conv4_block30_1_relu False 455 conv4_block30_2_pad False 456 conv4_block30_2_conv False 457 conv4_block30_2_bn False 458 conv4_block30_2_relu False 459 conv4_block30_3_conv False 460 conv4_block30_out False 461 conv4_block31_preact_bn False 462 conv4_block31_preact_relu False 463 conv4_block31_1_conv False 464 conv4_block31_1_bn False 465 conv4_block31_1_relu False 466 conv4_block31_2_pad False 467 conv4_block31_2_conv False 468 conv4_block31_2_bn False 469 conv4_block31_2_relu False 470 conv4_block31_3_conv False 471 conv4_block31_out False 472 conv4_block32_preact_bn False 473 conv4_block32_preact_relu False 474 conv4_block32_1_conv False 475 conv4_block32_1_bn False 476 conv4_block32_1_relu False 477 conv4_block32_2_pad False 478 conv4_block32_2_conv False 479 conv4_block32_2_bn False 480 conv4_block32_2_relu False 481 conv4_block32_3_conv False 482 conv4_block32_out False 483 conv4_block33_preact_bn False 484 conv4_block33_preact_relu False 485 conv4_block33_1_conv False 486 conv4_block33_1_bn False 487 conv4_block33_1_relu False 488 conv4_block33_2_pad False 489 conv4_block33_2_conv False 490 conv4_block33_2_bn False 491 conv4_block33_2_relu False 492 conv4_block33_3_conv False 493 conv4_block33_out False 494 conv4_block34_preact_bn False 495 conv4_block34_preact_relu False 496 conv4_block34_1_conv False 497 conv4_block34_1_bn False 498 conv4_block34_1_relu False 499 conv4_block34_2_pad False 500 conv4_block34_2_conv False 501 conv4_block34_2_bn False 502 conv4_block34_2_relu False 503 conv4_block34_3_conv False 504 conv4_block34_out False 505 conv4_block35_preact_bn False 506 conv4_block35_preact_relu False 507 conv4_block35_1_conv False 508 conv4_block35_1_bn False 509 conv4_block35_1_relu False 510 conv4_block35_2_pad False 511 conv4_block35_2_conv False 512 conv4_block35_2_bn False 513 conv4_block35_2_relu False 514 conv4_block35_3_conv False 515 conv4_block35_out False 516 conv4_block36_preact_bn False 517 conv4_block36_preact_relu False 518 conv4_block36_1_conv False 519 conv4_block36_1_bn False 520 conv4_block36_1_relu False 521 conv4_block36_2_pad False 522 conv4_block36_2_conv False 523 conv4_block36_2_bn False 524 conv4_block36_2_relu False 525 max_pooling2d_5 False 526 conv4_block36_3_conv False 527 conv4_block36_out False 528 conv5_block1_preact_bn False 529 conv5_block1_preact_relu False 530 conv5_block1_1_conv False 531 conv5_block1_1_bn False 532 conv5_block1_1_relu False 533 conv5_block1_2_pad False 534 conv5_block1_2_conv False 535 conv5_block1_2_bn False 536 conv5_block1_2_relu False 537 conv5_block1_0_conv False 538 conv5_block1_3_conv False 539 conv5_block1_out False 540 conv5_block2_preact_bn False 541 conv5_block2_preact_relu False 542 conv5_block2_1_conv False 543 conv5_block2_1_bn False 544 conv5_block2_1_relu False 545 conv5_block2_2_pad False 546 conv5_block2_2_conv False 547 conv5_block2_2_bn False 548 conv5_block2_2_relu False 549 conv5_block2_3_conv False 550 conv5_block2_out False 551 conv5_block3_preact_bn True 552 conv5_block3_preact_relu True 553 conv5_block3_1_conv True 554 conv5_block3_1_bn True 555 conv5_block3_1_relu True 556 conv5_block3_2_pad True 557 conv5_block3_2_conv True 558 conv5_block3_2_bn True 559 conv5_block3_2_relu True 560 conv5_block3_3_conv True 561 conv5_block3_out True 562 post_bn True 563 post_relu True
model_pretrained.compile(loss='binary_crossentropy'
, optimizer = tf.keras.optimizers.Adam(learning_rate=2e-6), metrics='binary_accuracy')
model_pretrained.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 224, 224, 3)] 0 _________________________________________________________________ resnet152v2 (Functional) (None, 7, 7, 2048) 58331648 _________________________________________________________________ global_average_pooling2d (Gl (None, 2048) 0 _________________________________________________________________ dense (Dense) (None, 128) 262272 _________________________________________________________________ dropout (Dropout) (None, 128) 0 _________________________________________________________________ dense_1 (Dense) (None, 1) 129 ================================================================= Total params: 58,594,049 Trainable params: 4,731,137 Non-trainable params: 53,862,912 _________________________________________________________________
history2 = model_pretrained.fit(ds_train,
batch_size = BATCH, epochs = 50,
validation_data=ds_val,
callbacks=[early_stopping, plateau],
steps_per_epoch=(len(train_df)/BATCH),
validation_steps=(len(val_df)/BATCH));
Epoch 1/50 130/130 [==============================] - 113s 867ms/step - loss: 0.1241 - binary_accuracy: 0.9591 - val_loss: 0.1195 - val_binary_accuracy: 0.9589 Epoch 2/50 130/130 [==============================] - 112s 860ms/step - loss: 0.1203 - binary_accuracy: 0.9587 - val_loss: 0.1127 - val_binary_accuracy: 0.9580 Epoch 3/50 130/130 [==============================] - 113s 867ms/step - loss: 0.1194 - binary_accuracy: 0.9565 - val_loss: 0.1116 - val_binary_accuracy: 0.9608 Epoch 4/50 130/130 [==============================] - 113s 861ms/step - loss: 0.1109 - binary_accuracy: 0.9627 - val_loss: 0.1107 - val_binary_accuracy: 0.9599 Epoch 5/50 130/130 [==============================] - 114s 866ms/step - loss: 0.1082 - binary_accuracy: 0.9632 - val_loss: 0.1058 - val_binary_accuracy: 0.9618 Epoch 6/50 130/130 [==============================] - 114s 875ms/step - loss: 0.1044 - binary_accuracy: 0.9637 - val_loss: 0.1034 - val_binary_accuracy: 0.9628 Epoch 7/50 130/130 [==============================] - 114s 869ms/step - loss: 0.1049 - binary_accuracy: 0.9656 - val_loss: 0.1029 - val_binary_accuracy: 0.9628 Epoch 8/50 130/130 [==============================] - 114s 872ms/step - loss: 0.1026 - binary_accuracy: 0.9627 - val_loss: 0.1014 - val_binary_accuracy: 0.9628 Epoch 9/50 130/130 [==============================] - 114s 871ms/step - loss: 0.0980 - binary_accuracy: 0.9670 - val_loss: 0.1003 - val_binary_accuracy: 0.9628 Epoch 10/50 130/130 [==============================] - 113s 861ms/step - loss: 0.1017 - binary_accuracy: 0.9634 - val_loss: 0.1003 - val_binary_accuracy: 0.9628 Epoch 11/50 130/130 [==============================] - 113s 867ms/step - loss: 0.1007 - binary_accuracy: 0.9639 - val_loss: 0.0956 - val_binary_accuracy: 0.9637 Epoch 12/50 130/130 [==============================] - 114s 868ms/step - loss: 0.0936 - binary_accuracy: 0.9675 - val_loss: 0.0983 - val_binary_accuracy: 0.9618 Epoch 13/50 130/130 [==============================] - 115s 876ms/step - loss: 0.0946 - binary_accuracy: 0.9685 - val_loss: 0.0944 - val_binary_accuracy: 0.9628 Epoch 14/50 130/130 [==============================] - 114s 870ms/step - loss: 0.0924 - binary_accuracy: 0.9687 - val_loss: 0.0960 - val_binary_accuracy: 0.9628 Epoch 15/50 130/130 [==============================] - 113s 865ms/step - loss: 0.0878 - binary_accuracy: 0.9701 - val_loss: 0.0915 - val_binary_accuracy: 0.9647 Epoch 16/50 130/130 [==============================] - 114s 870ms/step - loss: 0.0854 - binary_accuracy: 0.9685 - val_loss: 0.0903 - val_binary_accuracy: 0.9666 Epoch 17/50 130/130 [==============================] - 119s 907ms/step - loss: 0.0847 - binary_accuracy: 0.9716 - val_loss: 0.0919 - val_binary_accuracy: 0.9628 Epoch 18/50 130/130 [==============================] - 112s 857ms/step - loss: 0.0868 - binary_accuracy: 0.9704 - val_loss: 0.0924 - val_binary_accuracy: 0.9637 Epoch 00018: ReduceLROnPlateau reducing learning rate to 3.999999989900971e-07. Epoch 19/50 130/130 [==============================] - 113s 865ms/step - loss: 0.0881 - binary_accuracy: 0.9692 - val_loss: 0.0923 - val_binary_accuracy: 0.9628 Epoch 20/50 130/130 [==============================] - 112s 858ms/step - loss: 0.0842 - binary_accuracy: 0.9708 - val_loss: 0.0917 - val_binary_accuracy: 0.9628 Epoch 00020: ReduceLROnPlateau reducing learning rate to 8.00000009348878e-08. Epoch 21/50 130/130 [==============================] - 114s 870ms/step - loss: 0.0890 - binary_accuracy: 0.9692 - val_loss: 0.0909 - val_binary_accuracy: 0.9637
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history2.epoch, y = history2.history['loss'])
sns.lineplot(x = history2.epoch, y = history2.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.set_ylim(0, 0.3)
ax.legend(['train', 'val'], loc='best')
plt.show()
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history2.epoch, y = history2.history['binary_accuracy'])
sns.lineplot(x = history2.epoch, y = history2.history['val_binary_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.set_ylim(0.90, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()
score = model_pretrained.evaluate(ds_val, steps = len(val_df)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])
Val loss: 0.09034226089715958 Val accuracy: 0.9665711522102356
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: 0.3552650809288025 Test accuracy: 0.8766025900840759
Il s'agit des résultats les plus robustes et les meilleurs que nous ayons obtenus jusqu'à présent, avec une précision d'entrainement ≈ 0,88 et une précision de validation = 0,96. Bien sûr, nous avions une meilleure précision avec la version précédente, mais elle était surajustée ! La précision d'apprentissage ne signifie pas une belle réussite si la précision de validation n'a pas la même performance.
Mesures de la performance
num_label = {'Normal': 0, 'Pneumonia' : 1}
Y_test = df_test['class'].copy().map(num_label).astype('int')
ds_test.reset()
predictions = model_pretrained.predict(ds_test, steps=len(ds_test), verbose=0)
pred_labels= np.where(predictions>0.5, 1, 0)
print("Test Accuracy: ", accuracy_score(Y_test, pred_labels))
Test Accuracy: 0.8766025641025641
Métriques d'évaluation des modèles
Ensuite, nous obtenons une mesure de la performance de notre modèle en évaluant plusieurs métriques des prédictions par rapport aux étiquettes cibles réelles.
La précision n'est pas une bonne mesure d'évaluation lorsqu'il y a un grand déséquilibre entre les classes de données. Imaginons que nous ayons 100 échantillons : 99 pneumonies et 1 normal, alors un modèle qui prédit tout comme pneumonie aura une précision de 99%. Dans ce cas, il est préférable d'examinerle rappel, ainsi que leur moyenne harmonique, le score F1.
confusion_matrix = metrics.confusion_matrix(Y_test, pred_labels)
sns.heatmap(confusion_matrix, annot=True, fmt="d")
plt.xlabel("Predicted Label", fontsize= 12)
plt.ylabel("True Label", fontsize= 12)
plt.show()
print(metrics.classification_report(Y_test, pred_labels, labels = [0, 1]))
precision recall f1-score support
0 0.96 0.70 0.81 234
1 0.85 0.98 0.91 390
accuracy 0.88 624
macro avg 0.90 0.84 0.86 624
weighted avg 0.89 0.88 0.87 624
roc_auc = metrics.roc_auc_score(Y_test, predictions)
print('ROC_AUC: ', roc_auc)
fpr, tpr, thresholds = metrics.roc_curve(Y_test, predictions)
plt.plot(fpr, tpr, label = 'ROC_AUC = %0.3f' % roc_auc)
plt.xlabel("False Positive Rate", fontsize= 12)
plt.ylabel("True Positive Rate", fontsize= 12)
plt.legend(loc="lower right")
plt.show()
ROC_AUC: 0.958809993425378
model_pretrained.save("/kaggle/working/bestmodel")
test_dir = "../input/chest-xray-pneumonia/chest_xray/test"
test_data, test_labels = [], []
for i in ["/NORMAL/", "/PNEUMONIA/"]:
for image in (os.listdir(test_dir + i)):
image = plt.imread(test_dir + i + image)
image = cv2.resize(image, (224, 224))
image = np.dstack([image, image, image])
image = image.astype("float32") / 255
if i == "/NORMAL/":
label = 0
elif i == "/PNEUMONIA/":
label = 1
test_data.append(image)
test_labels.append(label)
test_data = np.array(test_data)
test_labels = np.array(test_labels)
predictions = list()
preds = model_pretrained.predict(ds_test)
for i in preds:
if i >= 0.5:
predictions.append(1)
else:
predictions.append(0)
predictions[:15]
[0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0]
correct= np.nonzero(predictions == test_labels)[0]
incorrect_predictions = np.nonzero(predictions != test_labels)[0]
# let's look at indices of correct predictions
print("Indices of correct predictions:\n", correct[:15])
# let's look at indices of incorrect correct predictions
print("\nIndices of incorrect predictions:\n", incorrect_predictions[:15])
Indices of correct predictions: [ 0 2 4 5 6 7 8 9 10 11 14 15 16 17 20] Indices of incorrect predictions: [ 1 3 12 13 18 19 22 24 25 26 31 32 33 35 47]
i = 0
for a in correct[:3]:
plt.figure(figsize = (8, 6), dpi = 85)
plt.subplot(3, 1, i + 1)
plt.xticks([]), plt.yticks([])
plt.imshow(test_data[a].reshape(224, 224, 3))
plt.title("Predicted class {} --- actual class {}".format(predictions[a], test_labels[a]))
plt.tight_layout()
i += 1
i = 0
for a in incorrect_predictions[:3]:
plt.figure(figsize = (8, 6), dpi = 85)
plt.subplot(3, 1, i + 1)
plt.xticks([]), plt.yticks([])
plt.imshow(test_data[a].reshape(224, 224, 3))
plt.title("Predicted class {} --- actual class {}".format(predictions[a], test_labels[a]))
plt.tight_layout()
i += 1